clear all
set more off, permanently
set type double
cd ""

* Log file
capture log close
global date "`c(current_date)'"
log using "log_files/tables_appendix_$date.txt", text replace

********************************************************************************
********* TABLE A1: Predicting Recall Elections  *******************************
********************************************************************************

use "base_main.dta", clear 

	quiet reg revocatoria win_margin turnout candidates_NR Uni Tec Sec2 age female work_public work_private yrs_elected yrs_partyoffice yrs_mayor nationalparty
	keep if e(sample)==1
		
	xtset district_id
	
	xtreg revocatoria win_margin turnout candidates_NR i.year, fe cluster(district_year) nonest  
	
	xtreg revocatoria win_margin turnout candidates_NR Uni Tec Sec2 i.year, fe cluster(district_year) nonest

	xtreg revocatoria win_margin turnout candidates_NR Uni Tec Sec2 age female work_public work_private yrs_elected yrs_partyoffice yrs_mayor nationalparty i.year, fe cluster(district_year) nonest
	
********************************************************************************
********* TABLE A2: Predicting Recall Attempts  ********************************
********************************************************************************

use "base_main.dta", clear 

	quiet reg revocatoria win_margin turnout candidates_NR Uni Tec Sec2 age female work_public work_private yrs_elected yrs_partyoffice yrs_mayor nationalparty
	keep if e(sample)==1
		
	xtset district_id
	
	xtreg kits win_margin turnout candidates_NR  i.year, fe cluster(district_year) nonest  
	
	xtreg kits win_margin turnout candidates_NR Uni Tec Sec2  i.year, fe cluster(district_year) nonest

	xtreg kits win_margin turnout candidates_NR Uni Tec Sec2 age female work_public work_private yrs_elected yrs_partyoffice yrs_mayor nationalparty i.year, fe cluster(district_year) nonest

********************************************************************************
*************** TABLE A4: Descriptive Statistics *******************************
********************************************************************************

use "base_main.dta", clear 

set matsize 3000
order Prim2 Sec2 Tec Uni yrs_edu yrs_elected yrs_partyoffice yrs_mayor nationalparty work_public work_private age female ind ind2 
replace CS07_per_ind_qa=(CS07_per_ind_qa*100) 
gen ENP=1/sqr_share_valid

label var ind "At least one native surname"
label var ind2 "Two native surnames"
label var candidates_NR "Number of Candidates"    
label var win_margin "Win Margin (%)"
label var ENP "Eff. Number of candidates"
label var turnout "Turnout (%)"
label var log_pim_ultimos3 "Ln(Revenues in N. Soles)"
label var log_ejec_ultimos3 "Ln(Expenditures in N. Soles)"
label var CS07_per_ind_qa "Native mother tongue (%)"
	
*- Gen winner
gen	election_winner =0
replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
replace election_winner=1 if winner==1 & year==2002

// Winners' characteristics:
	
	summ Prim2 Sec2 Tec Uni yrs_edu yrs_elected yrs_partyoffice yrs_mayor nationalparty work_public work_private age female ind ind2  if election_winner==1 	
	outreg2 using "log_files/summ1.xls" if election_winner==1 , replace ctitle(Winners-Full) sum(log) dec(3) keep(Prim2 Sec2 Tec Uni yrs_edu yrs_elected yrs_partyoffice yrs_mayor nationalparty work_public work_private age female ind ind2) eqkeep(N mean) label

// Candidates' characteristics:

	summ Prim2 Sec2 Tec Uni yrs_edu yrs_elected yrs_partyoffice yrs_mayor nationalparty work_public work_private age female ind ind2	
	outreg2 using "log_files/summ1.xls" , append ctitle(Candidates-Full) sum(log) dec(3) keep(Prim2 Sec2 Tec Uni yrs_edu yrs_elected yrs_partyoffice yrs_mayor nationalparty work_public work_private age female ind ind2) eqkeep(N mean) label
	
// District characteristics:
	
	collapse (mean) win_margin turnout candidates_NR ENP CS07_per_ind_qa log_pim_ultimos3 log_ejec_ultimos3, by(district_id year)
	order candidates_NR win_margin ENP turnout log_pim_ultimos3 log_ejec_ultimos3 CS07_per_ind_qa
	
	summ candidates_NR win_margin ENP turnout log_pim_ultimos3 log_ejec_ultimos3 CS07_per_ind_qa
	outreg2 using "log_files/summ2.xls", replace ctitle(District-Full) sum(log) dec(3) keep(candidates_NR win_margin ENP turnout CS07_per_ind_qa log_pim_ultimos3 log_ejec_ultimos3) eqkeep(N mean) label

********************************************************************************
*** TABLE A5: Accountability and Candidates' Education - Correlation ***********
********************************************************************************

use "base_main.dta", clear 

global education "yrs_edu Uni Tec Sec2"	
global political "turnout_lag win_margin_lag candidates_NR_lag"

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
	gen treat_2=treat_1
	replace treat_2=0 if treat_2!=1 & (revocatoria_lag!=.)
	tab treat_2
	
*- Table 

	*- PANEL A: Recalled incumbent
	foreach var of varlist $education  {
		reg `var' treat_2 $political, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
	}
	
	*- PANEL B: Recall referendum
	foreach var of varlist $education  {
		reg `var' revocatoria_lag $political, cluster(district_year) 
			di "Observations and mean dep."
			drop n_`var'
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
	}	
	
********************************************************************************
*** TABLE A6: Accountability and Candidates' Education - Specification Checks **
********************************************************************************

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_edu=0.5-0.137498229198215
	gen bw_below_Uni=0.5-0.1577362629911949
	gen bw_below_Tec=0.5-0.118662983718237
	gen bw_below_Sec2=0.5-0.1376469624331328
	
	gen bw_above_yrs_edu=0.5+0.137498229198215
	gen bw_above_Uni=0.5+0.1577362629911949
	gen bw_above_Tec=0.5+0.118662983718237
	gen bw_above_Sec2=0.5+0.1376469624331328
		
*- Table

	foreach var of varlist $education {
		
		*- PANEL A: Cubic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2 running_a3 if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year)
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 				

		*- PANEL B: Quartic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2 running_a3 running_a4 if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
	}

********************************************************************************
** TABLE A7: Accountability and Candidates' Education - Calonico et al. (2014) *
********************************************************************************

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Calonico bandwidths 	
	
	foreach var of varlist $education  {
		rdrobust `var' voterecall_lag, c(0.5) p(2) vce(cluster district_year)
	}
	
	gen bw_below_yrs_edu=0.5-0.139
	gen bw_below_Uni=0.5-0.126
	gen bw_below_Tec=0.5-0.105
	gen bw_below_Sec2=0.5-0.149
	
	gen bw_above_yrs_edu=0.5+0.139
	gen bw_above_Uni=0.5+0.126
	gen bw_above_Tec=0.5+0.105
	gen bw_above_Sec2=0.5+0.149
 
*- Table

	foreach var of varlist $education  {
	
		*- PANEL A: Bias-Corrected RD Estimates
		rdrobust `var' voterecall_lag, c(0.5) p(2) all vce(cluster district_year)

		*- PANEL B: Linear Polynomial Regression
		reg `var' treat_1 voterecall_lag if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 				
			
		*- PANEL C: Quadratic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2  if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
	}

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
*** TABLE A8: Accountability and Candidates’ Education - Arbitrary Bandwidth ***
********************************************************************************

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Table

	foreach var of varlist $education {
	
		*- PANEL A: Linear Polynomial Regression
		reg `var' treat_1 voterecall_lag if .47<=voterecall_lag & .53>=voterecall_lag, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
		
		*- PANEL B: Quadratic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2  if .47<=voterecall_lag & .53>=voterecall_lag, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
				
		*- PANEL C: Cubic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2 running_a3  if .47<=voterecall_lag & .53>=voterecall_lag, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
	}

********************************************************************************
**** TABLE A9: Mayor Characteristics and Policy Outcomes  **********************
********************************************************************************

use "base_main.dta", clear 

global education "Sec2 Tec Uni"	
global experience "yrs_elected yrs_partyoffice yrs_mayor"
global other "nationalparty work_public work_private age female"

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Sample of winners
	gen	election_winner =0
	replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
	replace election_winner=1 if winner==1 & year==2002			
	keep if election_winner==1
	
// Ln(Expenditures) //

	global depvar "log_ejec_ultimos3" 	
	xtset district_id

*-- Col1: Education dummies

	xtreg $depvar $education $experience $other i.year, fe vce(cluster district_year) nonest i(district_id) 

*-- Col2: Years of education

	xtreg $depvar yrs_edu $experience $other i.year, fe vce(cluster district_year) nonest i(district_id) 

// Ln(Revenues) //

	global depvar "log_pim_ultimos3" 	
	xtset district_id

*-- Col1: Education dummies

	xtreg $depvar $education $experience $other i.year, fe vce(cluster district_year) nonest i(district_id) 

*-- Col2: Years of education

	xtreg $depvar yrs_edu $experience $other i.year, fe vce(cluster district_year) nonest i(district_id) 

********************************************************************************
***** TABLE A10: Continuity Tests: Incumbent’s Education and Experience ********
********************************************************************************

use "base_main.dta", clear 

global education "yrs_edu Uni Tec Sec2"	
global characteristics "work_public work_private age female"	
global experience "yrs_elected yrs_mayor yrs_partyoffice nationalparty"

gen	election_winner =0
replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
replace election_winner=1 if winner==1 & year==2002
keep if election_winner==1

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_yrs_edu=0.5-.222680304667832
	gen bw_below_Uni=0.5-.1353260761376759
	gen bw_below_Tec=0.5-.1129019853742634
	gen bw_below_Sec2=0.5-.2237496236085149
	
	gen bw_above_yrs_edu=0.5+.222680304667832
	gen bw_above_Uni=0.5+.1353260761376759
	gen bw_above_Tec=0.5+.1129019853742634
	gen bw_above_Sec2=0.5+.2237496236085149
		
	gen bw_below_yrs_elected=0.5-.1327645504440371
	gen bw_below_yrs_partyoffice=0.5-.1447134151559589
	gen bw_below_yrs_mayor=0.5-.1237508882165855
	gen bw_below_age=0.5-.1141666158454647
	gen bw_below_female=0.5-.1343368662015998
	gen bw_below_nationalparty=0.5-.0912923222037916
	gen bw_below_work_public=0.5-.1614229397621897
	gen bw_below_work_private=0.5-.1261667253612351

	gen bw_above_yrs_elected=0.5+.1327645504440371
	gen bw_above_yrs_partyoffice=0.5+.1447134151559589
	gen bw_above_yrs_mayor=0.5+.1237508882165855
	gen bw_above_age=0.5+.1141666158454647
	gen bw_above_female=0.5+.1343368662015998
	gen bw_above_nationalparty=0.5+.0912923222037916
	gen bw_above_work_public=0.5+.1614229397621897
	gen bw_above_work_private=0.5+.1261667253612351
	
*- Treatment
	gen treat_inc=1 if voterecall>.5 & voterecall!=. 
	replace treat_inc=0 if voterecall<=.5 & voterecall!=. 
	tab treat_inc	

*- Table

	foreach var of varlist $education $experience $characteristics {
	
		*- Local Linear Regression
		rd `var' treat_inc voterecall, z0(0.5) mbw(100) 
			estadd local Imbens "Yes", replace 	
			estadd ysumm
			
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_inc voterecall if bw_below_`var'<=voterecall & bw_above_`var'>=voterecall, r 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 		
	}
	
/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */
	
********************************************************************************
***** TABLE A11: Robustness - Mechanisms: Performance in Office ****************
********************************************************************************

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

	 *- demeaned variables
	foreach v of varlist RealisedExpenses log_pim_ultimos3 log_ejec_ultimos3 {
		egen m_`v'=mean(`v')
		generate dem_`v' = `v'- m_`v'	  
	}

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Interaction terms
	foreach dep of varlist previouselection dem_RealisedExpenses dem_log_pim_ultimos3 dem_log_ejec_ultimos3 {	
	gen treated_`dep'=treat_1*`dep'
	}

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_edu=0.5-0.137498229198215
	gen bw_below_Uni=0.5-0.1577362629911949
	gen bw_below_Tec=0.5-0.118662983718237
	gen bw_below_Sec2=0.5-0.1376469624331328
	
	gen bw_above_yrs_edu=0.5+0.137498229198215
	gen bw_above_Uni=0.5+0.1577362629911949
	gen bw_above_Tec=0.5+0.118662983718237
	gen bw_above_Sec2=0.5+0.1376469624331328

*- Table
	
	foreach var of varlist $education  {
	
		*- PANEL A: Performance prior Recall – Revenues
		reg `var' treat_1 treated_dem_log_pim_ultimos3 dem_log_pim_ultimos3 voterecall_lag  l_population if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var',  cluster(district_year)
				di "Observations and mean dep."
				gen n_`var'=e(sample)
				sum `var' if n_`var'==1		
				di "Number of clusters"
				di e(N_clust) 			
			
		*- PANEL B: Performance prior Recall – Expenditures
		reg `var' treat_1 treated_dem_log_ejec_ultimos3 dem_log_ejec_ultimos3 voterecall_lag  l_population if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var',  cluster(district_year)
				di "Observations and mean dep."
				drop n_`var'
				gen n_`var'=e(sample)
				sum `var' if n_`var'==1		
				di "Number of clusters"
				di e(N_clust) 			
	}	
	
		
********************************************************************************
*** TABLE A12: Accountability and Candidates' Education - Recalled Neighbours **
********************************************************************************

use "base_neighbours.dta", clear
global education "yrs_edu Uni Tec Sec2"	

	*- Gen neighbour's recall share in t-1 
	foreach var of varlist vecino1_rs-vecino10_rs {
		gen `var'_dif=abs(`var'-.5)
	}

	egen voterecall_lag_vecino_id=rowmin(vecino1_rs_dif-vecino10_rs_dif) // min |voterecall_lag_vecino_i - 0.5| if more than one neighbour with recall

	gen voterecall_lag_vecino=.
		foreach var of varlist vecino1_rs-vecino10_rs {
		replace voterecall_lag_vecino=`var' if  voterecall_lag_vecino_id==`var'_dif 
	}

	*- Gen treatment var
	gen treat_vecino=1 if voterecall_lag_vecino>.5 & voterecall_lag_vecino!=. & voterecall_lag==.
	replace treat_vecino=0 if voterecall_lag_vecino<=.5 & voterecall_lag_vecino!=. & voterecall_lag==. 
	replace voterecall_lag_vecino=. if voterecall_lag!=. 

	*- Polyomials:
	gen running_vec2 = voterecall_lag_vecino^2

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_yrs_edu=0.5-0.0502318703919378 
	gen bw_below_Uni=0.5-0.0485165897330795 
	gen bw_below_Tec=0.5-0.1027995031937449 
	gen bw_below_Sec2=0.5-0.0452631239911648 

	gen bw_above_yrs_edu=0.5+0.0502318703919378 
	gen bw_above_Uni=0.5+0.0485165897330795 
	gen bw_above_Tec=0.5+0.1027995031937449 
	gen bw_above_Sec2=0.5+0.0452631239911648 

*- Table

	foreach var of varlist $education {
	
		*- PANEL A: Linear Polynomial Regression 
		reg `var' treat_vecino voterecall_lag_vecino if bw_below_`var'<=voterecall_lag_vecino & bw_above_`var'>=voterecall_lag_vecino, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
			
		*- PANEL B: Quadratic Polynomial Regression
		reg `var' treat_vecino voterecall_lag_vecino running_vec2  if bw_below_`var'<=voterecall_lag_vecino & bw_above_`var'>=voterecall_lag_vecino, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
	}
	
********************************************************************************
*********** TABLE A13: Robustness for Recalled Neighbours **********************
********************************************************************************

use "base_neighbours.dta", clear
global education "yrs_edu Uni Tec Sec2"	

	*- Gen neighbour's recall share in t-1 
	foreach var of varlist vecino1_rs-vecino10_rs {
		gen `var'_dif=abs(`var'-.5)
	}

	egen voterecall_lag_vecino_id=rowmin(vecino1_rs_dif-vecino10_rs_dif) // min |voterecall_lag_vecino_i - 0.5| if more than one neighbour with recall

	gen voterecall_lag_vecino=.
		foreach var of varlist vecino1_rs-vecino10_rs {
		replace voterecall_lag_vecino=`var' if  voterecall_lag_vecino_id==`var'_dif 
	}

	*- Identify districts with conflicting signals
	foreach var of varlist vecino1_rs-vecino10_rs {
	gen `var'_s=(`var'>.5) if `var'!=.
	}

	egen s_first=rowfirst(vecino1_rs_s-vecino10_rs_s)
	egen s_last=rowlast(vecino1_rs_s-vecino10_rs_s)
	gen s_dif=abs(s_first-s_last) // 1 if conflicting signal

	*- Gen treatment var
	gen treat_vecino=1 if voterecall_lag_vecino>.5 & voterecall_lag_vecino!=. & voterecall_lag==. & s_dif==0
	replace treat_vecino=0 if voterecall_lag_vecino<=.5 & voterecall_lag_vecino!=. & voterecall_lag==. & s_dif==0 
	replace voterecall_lag_vecino=. if voterecall_lag!=. 

	*- Polyomials:
	gen running_vec2 = voterecall_lag_vecino^2
	
*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_yrs_edu=0.5-0.070023559995764 
	gen bw_below_Uni=0.5-0.072705724457935 
	gen bw_below_Tec=0.5-0.1175733358201326 
	gen bw_below_Sec2=0.5-0.061504754397355 
	
	gen bw_above_yrs_edu=0.5+0.070023559995764 
	gen bw_above_Uni=0.5+0.072705724457935 
	gen bw_above_Tec=0.5+0.1175733358201326 
	gen bw_above_Sec2=0.5+0.061504754397355 
	
*- Table

	foreach var of varlist $education {
		
		*- PANEL A: Local Linear Regression
		rd `var' treat_vecino voterecall_lag_vecino, z0(0.5) mbw(100) cluster(district_year) 

		*- PANEL B: Linear Polynomial Regression 
		reg `var' treat_vecino voterecall_lag_vecino if bw_below_`var'<=voterecall_lag_vecino & bw_above_`var'>=voterecall_lag_vecino, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
			
		*- PANEL C: Quadratic Polynomial Regression
		reg `var' treat_vecino voterecall_lag_vecino running_vec2   if bw_below_`var'<=voterecall_lag_vecino & bw_above_`var'>=voterecall_lag_vecino, cluster(district_year)
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
	}

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
*************** TABLE A14: Robustness Checks  **********************************
********************************************************************************

use "base_main.dta", clear 

global education "yrs_edu Uni Tec Sec2"	
global mayor_characteristics "inc_Uni inc_Tec inc_Sec2 inc_Prim2 inc_age inc_female inc_nationalparty inc_work_public inc_work_private inc_yrs_elected inc_yrs_partyoffice inc_yrs_mayor"			

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_yrs_edu=0.5-0.137498229198215
	gen bw_below_Uni=0.5-0.1577362629911949
	gen bw_below_Tec=0.5-0.118662983718237
	gen bw_below_Sec2=0.5-0.1376469624331328
	
	gen bw_above_yrs_edu=0.5+0.137498229198215
	gen bw_above_Uni=0.5+0.1577362629911949
	gen bw_above_Tec=0.5+0.118662983718237
	gen bw_above_Sec2=0.5+0.1376469624331328

*- Table

	foreach var of varlist $education {	
	
		*- PANEL A: Controlling for Incumbent's Characteristics
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) covar($mayor_characteristics) cluster(district_year) 

			*-- Linear Polynomial Regression (for observations and mean dep.)
			reg `var' treat_1 voterecall_lag  $mayor_characteristics if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var',  cluster(district_year)
				di "Observations and mean dep."
				gen n_`var'=e(sample)
				sum `var' if n_`var'==1		
				di "Number of clusters"
				di e(N_clust) 

		*- PANEL B: Controlling for Political Situation in t-1
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) covar(turnout_lag win_margin_lag candidates_NR_lag population) cluster(district_year) 
		
			*-- Linear Polynomial Regression (for observations and mean dep.)
			reg `var' treat_1 voterecall_lag turnout_lag win_margin_lag candidates_NR_lag population if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var', cluster(district_year) 
				drop n_`var'
				di "Observations and mean dep."
				gen n_`var'=e(sample)
				sum `var' if n_`var'==1		
				di "Number of clusters"
				di e(N_clust) 
	}

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
*************** TABLE A15: Robustness Checks  **********************************
********************************************************************************

use "base_main.dta", clear 

global education "yrs_edu Uni Tec Sec2"	
global runnersup "runnersup_Prim2 runnersup_Sec2 runnersup_Tec runnersup_Uni runnersup_age runnersup_female runnersup_nationalparty runnersup_work_public runnersup_work_private runnersup_yrs_elected runnersup_yrs_partyoffice runnersup_yrs_mayor" 

gen	election_winner =0
replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
replace election_winner=1 if winner==1 & year==2002
sort person_id year
by person_id: gen incumbent=1 if election_winner[_n-1]==1 & person_id!=.

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012): (excluding re-running incumbents)
	
	gen bw_below_yrs_edu=0.5-0.1370557353603857
	gen bw_below_Uni=0.5-0.1673815734300663
	gen bw_below_Tec=0.5-0.1305954066702182
	gen bw_below_Sec2=0.5-0.1367934139266808
	
	gen bw_above_yrs_edu=0.5+0.1370557353603857
	gen bw_above_Uni=0.5+0.1673815734300663
	gen bw_above_Tec=0.5+0.1305954066702182
	gen bw_above_Sec2=0.5+0.1367934139266808
	
*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012): (including re-running incumbents)
	
	gen bw_below_n_yrs_edu=0.5-0.137498229198215
	gen bw_below_n_Uni=0.5-0.1577362629911949
	gen bw_below_n_Tec=0.5-0.118662983718237
	gen bw_below_n_Sec2=0.5-0.1376469624331328
	
	gen bw_above_n_yrs_edu=0.5+0.137498229198215
	gen bw_above_n_Uni=0.5+0.1577362629911949
	gen bw_above_n_Tec=0.5+0.118662983718237
	gen bw_above_n_Sec2=0.5+0.1376469624331328

*- Table

	foreach var of varlist $education {	

		*- PANEL A: Dropping re-running Incumbents
		rd `var' treat_1 voterecall_lag if incumbent!=1, z0(0.5) mbw(100) cluster(district_year) 
			
			*-- Linear Polynomial Regression (for observations and mean dep.)
			reg `var' treat_1 voterecall_lag if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var' & incumbent!=1, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 

		*- PANEL B: Controlling for Characteristics of Runners-up
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) covar($runnersup) cluster(district_year) 	
		
			*-- Linear Polynomial Regression (for observations and mean dep.)
			reg `var' treat_1 voterecall_lag $runnersup  if voterecall_lag>=bw_below_n_`var' & voterecall_lag<=bw_above_n_`var',  cluster(district_year) 
			drop n_`var'
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
	}

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
**** TABLE A16: Accountability and Winners' Characteristics  *******************
********************************************************************************

use "base_main.dta", clear 

global experience "yrs_elected yrs_mayor yrs_partyoffice nationalparty"
global characteristics "work_public work_private age female"	

*- Sample of winners
	gen	election_winner =0
	replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
	replace election_winner=1 if winner==1 & year==2002			
	keep if election_winner==1

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_elected=0.5-0.1404759046709425
	gen bw_below_yrs_partyoffice=0.5-0.1446429774879379
	gen bw_below_yrs_mayor=0.5-0.2564143974000668
	gen bw_below_age=0.5-0.1303550088922776
	gen bw_below_female=0.5-0.1726929563729124
	gen bw_below_nationalparty=0.5-0.0996461560501708
	gen bw_below_work_public=0.5-0.1445249706777558
	gen bw_below_work_private=0.5-0.2216536371315442
	
	gen bw_above_yrs_elected=0.5+0.1404759046709425
	gen bw_above_yrs_partyoffice=0.5+0.1446429774879379
	gen bw_above_yrs_mayor=0.5+0.2564143974000668
	gen bw_above_age=0.5+0.1303550088922776
	gen bw_above_female=0.5+0.1726929563729124
	gen bw_above_nationalparty=0.5+0.0996461560501708
	gen bw_above_work_public=0.5+0.1445249706777558
	gen bw_above_work_private=0.5+0.2216536371315442
	
*- Table
	
	foreach var of varlist $experience $characteristics {	
	
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) 
		
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag  if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, vce(robust)
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1					
	}	

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

log close



